from __future__ import division
from operator import itemgetter, attrgetter

import gc
import math
import matplotlib
import os
import pylab
import random
import sys
import time
from sets import Set
from scipy import stats
import numpy as np

numOfDocumentResultRetained = 0

top10PostingDict = {}
# inputFileName2 = "/home/diaosi/web-search-engine-wei/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_1_20140211Night_DEBUG_1%"
# inputFileName2 = "/home/diaosi/web-search-engine-wei/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140217Night_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140217Night_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140217Night_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140217Night_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_15%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_20%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_30%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_40%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_5_20140218Morning_DEBUG_50%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140218Afternoon_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140218Afternoon_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140218Afternoon_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140218Afternoon_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_15%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_20%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_30%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_40%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_0_20140219Afternoon_DEBUG_50%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_15%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_20%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_30%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_40%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_10_20140220Morning_DEBUG_50%"

# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_15%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_20%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_30%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_40%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_20_20140221Afternoon_DEBUG_50%"

# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_1%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_3%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_5%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_10%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_15%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_20%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_30%"
# inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_40%"
inputFileName2 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/importantPostingsBeingPoppedAtDifferentLevels_weight_30_20140221Afternoon_DEBUG_50%"



print "inputFileName2:",inputFileName2
inputFileHandler = open(inputFileName2,"r")
# ignore the headline
inputFileHandler.readline()
currentLine = inputFileHandler.readline()
while currentLine:
    top10PostingKey = currentLine.strip().split(" ")[0]
    if top10PostingKey not in top10PostingDict:
        top10PostingDict[top10PostingKey] = 1
    else:
        top10PostingDict[top10PostingKey] += 1
    currentLine = inputFileHandler.readline()
print "len(top10PostingDict): ",len(top10PostingDict)
inputFileHandler.close()

documentResultDict = {}
inputFileName1 = "/data/obukai/workspace_USE_SINCE_20140217Night/web-search-engine-wei-2014-March/polyIRIndexer/tail5KResults_sortedByQID_NEW_FORMAT_20140210Afternoon"
inputFileHandler = open(inputFileName1,"r")
currentLine = inputFileHandler.readline()
while currentLine:
    currentLineElements = currentLine.strip().split(" ")
    qIDInStringFormat = currentLineElements[0]
    docIDInStringFormat = currentLineElements[1]
    termIDInStringFormat = currentLineElements[2]
    documentResultKey = qIDInStringFormat + "_" + docIDInStringFormat 
    if documentResultKey not in documentResultDict:
        documentResultDict[documentResultKey] = []
        documentResultDict[documentResultKey].append(termIDInStringFormat)
    else:
        documentResultDict[documentResultKey].append(termIDInStringFormat)
    currentLine = inputFileHandler.readline()
print "len(documentResultDict): ",len(documentResultDict)
inputFileHandler.close()

for documentResultKey in documentResultDict:
    docID = documentResultKey.strip().split("_")[1]
    currentDocumentResultRetainedFlag = True
    for termID in documentResultDict[documentResultKey]:
        top10PostingKey = termID + "_" + docID 
        if top10PostingKey not in top10PostingDict:
            currentDocumentResultRetainedFlag = False
    if not currentDocumentResultRetainedFlag:
        pass
    else:
        numOfDocumentResultRetained += 1
print "numOfDocumentResultRetained: ",numOfDocumentResultRetained
exit(1)


